These are all definitions that holds for a given number of consecutive cycles of a given user. These metrics can be defined as running metric to reflect the evolutions of the symptoms. Here, we will define these metric over 6 cycles.
The duration is defined as the average number of days per cycle in which the symptom is reported.
\[ d = \frac{N}{C} \]
d_wide = matrix(
c(0,0,1,1,0,0,rep(0,20),
0,0,0,0,0,0,rep(0,20),
0,0,1,1,0,0,rep(0,20),
0,0,0,0,0,0,rep(0,20),
0,0,1,1,0,0,rep(0,20),
0,0,0,0,0,0,rep(0,20)),
nrow = 6, byrow = TRUE)cvdt_duration = function(d_wide){
j = which(apply(d_wide, 1, sum)>0)
if(length(j)>0){
duration = sum(d_wide[j,])/length(j)
}else{duration = 0}
return(duration)
}
cvdt_duration(d_wide)## [1] 2
where \(N\) is the number of reported symptoms and \(C\) is the number of cycles.
The timing is the day in the cycle at which the symptom is reported on average (or the average cycleday at which the symptom are reported).
\[ t = \frac{\sum d^{\tt TB}}{N} \] where \(d\) is the cycleday (from -18 to 7) and \(\tt TB\) is a binary variable whose value is 1 when the symptom is reported and 0 when not.
cvdt_timing = function(d_wide){
cycledays = matrix(rep(c(-par$D:par$Df), nrow(d_wide)), nrow = nrow(d_wide),byrow = TRUE)
timing = mean(cycledays[d_wide > 0])
return(timing)
}
cvdt_timing(d_wide)## [1] -15.5
The variability decreases as the standard deviation of the number of symptom per cycle increases.
\[ v = \tt sd(n_c) \] where \(n_c\) is the number of symptoms reported in cycle \(c\).
cvdt_variability = function(d_wide){
variability = sd(apply(d_wide, 1, sum))
return(variability)
}
cvdt_variability(d_wide)## [1] 1.095445
The cyclicity is the probability that the symptom is reported in a predictable window of time with respect to the end of the menstrual cycle.
find_window = function(average_profile, nd, timing){
D = length(average_profile)
d = sort(order(average_profile, decreasing = TRUE)[1:nd]) # we take the days with the highest average profile
conditions = (abs(mean(c(-par$D:par$Df)[d]) - timing)>(nd/1.5)) | # window not centered on timing
(length(d)>1) & (sum(diff(d)>1)>1) # too many interuptions in d, we take a continuous window of nd around timing
if(conditions){
j = round(timing)+par$D+1; j_start = (j - floor(nd/2));j_end = (j+ceiling(nd/2)-1)
if(j_start < 1){j_start = 1; j_end = nd}; if(j_end > D){j_end = D; j_start = D-nd+1}
d = j_start:j_end
}
db = rep(FALSE,D)
db[d] = TRUE
return(db)
}
compute_cyclicity_statistics = function(X, plot = FALSE){
j = which(apply(X, 1, sum)>0)
if(length(j)>1){
N = sum(X);
if(N > 1){
C = nrow(X); D = ncol(X);
duration = cvdt_duration(X); timing = cvdt_timing(X)
# average profile #average_profile = apply(X, 2, mean)
average_profile = apply(X, 2, sum); average_profile = average_profile/length(j)
# finding the window
nd = round(duration+2*(C-duration)/C) # number of days for the window; We give a larger margin for short durations
if(nd<D){
d = find_window(average_profile, nd, timing)
p_d = mean(average_profile[d]); p_o = mean(average_profile[!d])
stat = (p_d - p_o)/mean(c(p_d,p_d,1))
if(plot){plot(c(-par$D:par$Df), average_profile, type = "b", ylim = c(0,1), col = d+1, pch = c(1,16)[d+1])}
}else{stat = 0}
}else{stat = 0}
}else{stat = 0}
return(stat)
}D = par$D + par$Df +1
par$dict_cyclicity_stat = data.frame()
for(d in 0:D){
cat(d,"\n")
C = 100; N = d*C; S = C*D
statistics = c()
for(i in 1:200){
X = matrix(sample(c(rep(1,N),rep(0,S - N))), nrow = C, ncol = D)
compute_cyclicity_statistics(X)
statistics = c(statistics,compute_cyclicity_statistics(X))
}
par$dict_cyclicity_stat = rbind(par$dict_cyclicity_stat, data.frame(d = d, statistics = statistics))
}## 0
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
plot(aggregate(statistics ~ d, par$dict_cyclicity_stat, median), ylim = c(-0.2,1))
abline(h = 0)ggplot(par$dict_cyclicity_stat, aes(x = statistics, fill = factor(d))) +
geom_histogram(binwidth = 0.01)+
facet_grid(d ~ ., scale = "free_y")+xlim(c(-1,1))## Warning: Removed 54 rows containing missing values (geom_bar).
ggplot(par$dict_cyclicity_stat, aes(x = statistics, col = factor(d))) +
geom_density(bw = 0.1)+xlim(c(-1,1))cvdt_cyclicity = function(d_wide){
cyclicity = max(0,compute_cyclicity_statistics(d_wide))
return(cyclicity)
}
cvdt_cyclicity(d_wide)## [1] 0.8571429
cvdt_cyclicity(X)## [1] 0
Loading data
load(paste0(IO$output_data, "users.Rdata"), verbose = TRUE)## Loading objects:
## users
load(paste0(IO$output_data, "cycles_m.Rdata"), verbose = TRUE)## Loading objects:
## cycles_m
colnames(cycles_m)## [1] "cycle_id_m" "n_obs"
## [3] "n_days_obs_lut_D" "n_days_obs_lut_7"
## [5] "n_days_obs_foll_Df" "n_TB"
## [7] "first_TB" "last_TB"
## [9] "TB_stretch" "user_id"
## [11] "cycle_nb_m" "age"
## [13] "country" "height"
## [15] "weight" "BC"
## [17] "n_days_obs_D_Df" "n_days_obs_lut_D_7"
## [19] "f_D_7" "f_7_0"
## [21] "f_0_7" "tracking_cluster_n"
## [23] "tracking_cluster" "tracking_group"
## [25] "cycle_nb_m_rel" "init_TB_group"
## [27] "valid_transition" "cycle_nb_m_from_trans"
head(cycles_m)## cycle_id_m n_obs n_days_obs_lut_D
## 1 002f1fd24f5f162b50392c08c15ce6d15ee861f8_10 23 16
## 2 002f1fd24f5f162b50392c08c15ce6d15ee861f8_11 25 17
## 3 002f1fd24f5f162b50392c08c15ce6d15ee861f8_12 15 15
## 4 002f1fd24f5f162b50392c08c15ce6d15ee861f8_13 16 14
## 5 002f1fd24f5f162b50392c08c15ce6d15ee861f8_14 12 11
## 6 002f1fd24f5f162b50392c08c15ce6d15ee861f8_2 2 0
## n_days_obs_lut_7 n_days_obs_foll_Df n_TB first_TB last_TB TB_stretch
## 1 5 7 0 NA NA NA
## 2 6 8 0 NA NA NA
## 3 4 0 0 NA NA NA
## 4 7 0 0 NA NA NA
## 5 5 0 0 NA NA NA
## 6 0 2 0 NA NA NA
## user_id cycle_nb_m age country
## 1 002f1fd24f5f162b50392c08c15ce6d15ee861f8 10 20 United Kingdom
## 2 002f1fd24f5f162b50392c08c15ce6d15ee861f8 11 20 United Kingdom
## 3 002f1fd24f5f162b50392c08c15ce6d15ee861f8 12 20 United Kingdom
## 4 002f1fd24f5f162b50392c08c15ce6d15ee861f8 13 20 United Kingdom
## 5 002f1fd24f5f162b50392c08c15ce6d15ee861f8 14 20 United Kingdom
## 6 002f1fd24f5f162b50392c08c15ce6d15ee861f8 2 20 United Kingdom
## height weight BC n_days_obs_D_Df n_days_obs_lut_D_7 f_D_7
## 1 155 46 pill 23 11 1.0000000
## 2 155 46 pill 25 11 1.0000000
## 3 155 46 pill 15 11 1.0000000
## 4 155 46 pill 14 7 0.6363636
## 5 155 46 pill 11 6 0.5454545
## 6 155 46 pill 2 0 0.0000000
## f_7_0 f_0_7 tracking_cluster_n tracking_cluster tracking_group
## 1 0.7142857 0.875 1 regular_tracking regular tracking
## 2 0.8571429 1.000 1 regular_tracking regular tracking
## 3 0.5714286 0.000 1 regular_tracking sporadic tracking
## 4 1.0000000 0.000 1 regular_tracking sporadic tracking
## 5 0.7142857 0.000 2 menstrual_tracking sporadic tracking
## 6 0.0000000 0.250 2 menstrual_tracking menstrual tracking
## cycle_nb_m_rel init_TB_group valid_transition cycle_nb_m_from_trans
## 1 8 [0,1) invalid NA
## 2 9 [0,1) invalid NA
## 3 10 [0,1) invalid NA
## 4 11 [0,1) invalid NA
## 5 12 [0,1) invalid NA
## 6 0 [0,1) invalid NA
Selecting stretched of at least 6 consecutive cycles of regular tracking with at least 6 TB in these consecutive cycles.
n_consecutive_cycles = 6
o = order(cycles_m$user_id, cycles_m$cycle_nb_m)
cycles_m = cycles_m[o,]
# at least 3 TB in 6 consecutive cycles
running_cumsum = function(x, n = n_consecutive_cycles){
X = data.frame()
N = length(x)
for(i in 1:n){
if(i <= N){
X = rbind(X, x[i:(N-i+1)])
}
}
y = apply(X, 2, sum)
i = min(N,n)
if(i > 1){y[1:(i-1)] = y[i]}
return(y)
}
n_TB_running = ave(cycles_m$n_TB, by = cycles_m$user_id, FUN = running_cumsum)
valid = ((cycles_m$tracking_group == "regular tracking") & (n_TB_running >= 6) & (cycles_m$BC %in% par$BC_dict$name))
valid[is.na(valid)] = FALSE
valid_stretches = function(x, n = n_consecutive_cycles){
r = rle(x)
vs = rep((r$lengths>= n) & (r$values), r$lengths)
return(vs)
}
stretches_number = function(x){
r = rle(x)
ns = rep(1:length(r$lengths), r$lengths )
return(ns)
}
stretches_num = ave(valid, by = cycles_m$user_id, FUN = stretches_number)
stretches = ave(valid, by = cycles_m$user_id, FUN = valid_stretches)
sum(stretches)## [1] 160861
cycle_ids = cycles_m$cycle_id_m[which(stretches)]
user_ids = unique(cycles_m$user_id[which(stretches)])
cycles_m$stretch_nb = stretches_num * stretches
cycles_m$stretch_id = paste0(cycles_m$user_id, "_s", cycles_m$stretch_nb)Collect tracking data for these cycles
batches = unique(users$batch[which(users$user_id %in% user_ids)])
days_selected_cycles = data.frame()
tic()
for(b in unique(batches)){ # unique(c(190,batches[1:10]))
cat(b, "\n")
load(paste0(IO$output_data, "days/days_",b,".Rdata"), verbose = TRUE)
j = which((days$cycle_id_m %in% cycle_ids) & (days$type %in% c(TB,"n_logs")) & (!is.na(days$cycleday_m_D)))
if(length(j)>0){
days_selected_cycles = rbind(days_selected_cycles, days[j,])
}
}## 1
## Loading objects:
## days
## 2
## Loading objects:
## days
## 3
## Loading objects:
## days
## 4
## Loading objects:
## days
## 5
## Loading objects:
## days
## 6
## Loading objects:
## days
## 7
## Loading objects:
## days
## 8
## Loading objects:
## days
## 9
## Loading objects:
## days
## 10
## Loading objects:
## days
## 11
## Loading objects:
## days
## 12
## Loading objects:
## days
## 13
## Loading objects:
## days
## 14
## Loading objects:
## days
## 15
## Loading objects:
## days
## 16
## Loading objects:
## days
## 17
## Loading objects:
## days
## 18
## Loading objects:
## days
## 19
## Loading objects:
## days
## 20
## Loading objects:
## days
## 21
## Loading objects:
## days
## 22
## Loading objects:
## days
## 23
## Loading objects:
## days
## 24
## Loading objects:
## days
## 25
## Loading objects:
## days
## 26
## Loading objects:
## days
## 27
## Loading objects:
## days
## 28
## Loading objects:
## days
## 29
## Loading objects:
## days
## 30
## Loading objects:
## days
## 31
## Loading objects:
## days
## 32
## Loading objects:
## days
## 33
## Loading objects:
## days
## 34
## Loading objects:
## days
## 35
## Loading objects:
## days
## 36
## Loading objects:
## days
## 37
## Loading objects:
## days
## 38
## Loading objects:
## days
## 39
## Loading objects:
## days
## 40
## Loading objects:
## days
## 41
## Loading objects:
## days
## 42
## Loading objects:
## days
## 43
## Loading objects:
## days
## 44
## Loading objects:
## days
## 45
## Loading objects:
## days
## 46
## Loading objects:
## days
## 47
## Loading objects:
## days
## 48
## Loading objects:
## days
## 49
## Loading objects:
## days
## 50
## Loading objects:
## days
## 51
## Loading objects:
## days
## 52
## Loading objects:
## days
## 53
## Loading objects:
## days
## 54
## Loading objects:
## days
## 55
## Loading objects:
## days
## 56
## Loading objects:
## days
## 57
## Loading objects:
## days
## 58
## Loading objects:
## days
## 59
## Loading objects:
## days
## 60
## Loading objects:
## days
## 61
## Loading objects:
## days
## 62
## Loading objects:
## days
## 63
## Loading objects:
## days
## 64
## Loading objects:
## days
## 65
## Loading objects:
## days
## 66
## Loading objects:
## days
## 67
## Loading objects:
## days
## 68
## Loading objects:
## days
## 69
## Loading objects:
## days
## 70
## Loading objects:
## days
## 71
## Loading objects:
## days
## 72
## Loading objects:
## days
## 73
## Loading objects:
## days
## 74
## Loading objects:
## days
## 75
## Loading objects:
## days
## 76
## Loading objects:
## days
## 77
## Loading objects:
## days
## 78
## Loading objects:
## days
## 79
## Loading objects:
## days
## 80
## Loading objects:
## days
## 81
## Loading objects:
## days
## 82
## Loading objects:
## days
## 83
## Loading objects:
## days
## 84
## Loading objects:
## days
## 85
## Loading objects:
## days
## 86
## Loading objects:
## days
## 87
## Loading objects:
## days
## 88
## Loading objects:
## days
## 89
## Loading objects:
## days
## 90
## Loading objects:
## days
## 91
## Loading objects:
## days
## 92
## Loading objects:
## days
## 93
## Loading objects:
## days
## 94
## Loading objects:
## days
## 95
## Loading objects:
## days
## 96
## Loading objects:
## days
## 97
## Loading objects:
## days
## 98
## Loading objects:
## days
## 99
## Loading objects:
## days
## 100
## Loading objects:
## days
## 101
## Loading objects:
## days
## 102
## Loading objects:
## days
## 103
## Loading objects:
## days
## 104
## Loading objects:
## days
## 105
## Loading objects:
## days
## 106
## Loading objects:
## days
## 107
## Loading objects:
## days
## 108
## Loading objects:
## days
## 109
## Loading objects:
## days
## 110
## Loading objects:
## days
## 111
## Loading objects:
## days
## 112
## Loading objects:
## days
## 113
## Loading objects:
## days
## 114
## Loading objects:
## days
## 115
## Loading objects:
## days
## 116
## Loading objects:
## days
## 117
## Loading objects:
## days
## 118
## Loading objects:
## days
## 119
## Loading objects:
## days
## 120
## Loading objects:
## days
## 121
## Loading objects:
## days
## 122
## Loading objects:
## days
## 123
## Loading objects:
## days
## 124
## Loading objects:
## days
## 125
## Loading objects:
## days
## 126
## Loading objects:
## days
## 127
## Loading objects:
## days
## 128
## Loading objects:
## days
## 129
## Loading objects:
## days
## 130
## Loading objects:
## days
## 131
## Loading objects:
## days
## 132
## Loading objects:
## days
## 133
## Loading objects:
## days
## 134
## Loading objects:
## days
## 135
## Loading objects:
## days
## 136
## Loading objects:
## days
## 137
## Loading objects:
## days
## 138
## Loading objects:
## days
## 139
## Loading objects:
## days
## 140
## Loading objects:
## days
## 141
## Loading objects:
## days
## 142
## Loading objects:
## days
## 143
## Loading objects:
## days
## 144
## Loading objects:
## days
## 145
## Loading objects:
## days
## 146
## Loading objects:
## days
## 147
## Loading objects:
## days
## 148
## Loading objects:
## days
## 149
## Loading objects:
## days
## 150
## Loading objects:
## days
## 151
## Loading objects:
## days
## 152
## Loading objects:
## days
## 153
## Loading objects:
## days
## 154
## Loading objects:
## days
## 155
## Loading objects:
## days
## 156
## Loading objects:
## days
## 157
## Loading objects:
## days
## 158
## Loading objects:
## days
## 159
## Loading objects:
## days
## 160
## Loading objects:
## days
## 161
## Loading objects:
## days
## 162
## Loading objects:
## days
## 163
## Loading objects:
## days
## 164
## Loading objects:
## days
## 165
## Loading objects:
## days
## 166
## Loading objects:
## days
## 167
## Loading objects:
## days
## 168
## Loading objects:
## days
## 169
## Loading objects:
## days
## 170
## Loading objects:
## days
## 171
## Loading objects:
## days
## 172
## Loading objects:
## days
## 173
## Loading objects:
## days
## 174
## Loading objects:
## days
## 175
## Loading objects:
## days
## 176
## Loading objects:
## days
## 177
## Loading objects:
## days
## 178
## Loading objects:
## days
## 179
## Loading objects:
## days
## 180
## Loading objects:
## days
## 181
## Loading objects:
## days
## 182
## Loading objects:
## days
## 183
## Loading objects:
## days
## 184
## Loading objects:
## days
## 185
## Loading objects:
## days
## 186
## Loading objects:
## days
## 187
## Loading objects:
## days
## 188
## Loading objects:
## days
## 189
## Loading objects:
## days
## 190
## Loading objects:
## days
## 191
## Loading objects:
## days
## 192
## Loading objects:
## days
## 193
## Loading objects:
## days
## 194
## Loading objects:
## days
## 195
## Loading objects:
## days
## 196
## Loading objects:
## days
## 197
## Loading objects:
## days
## 198
## Loading objects:
## days
## 199
## Loading objects:
## days
## 200
## Loading objects:
## days
## 201
## Loading objects:
## days
## 202
## Loading objects:
## days
## 203
## Loading objects:
## days
## 204
## Loading objects:
## days
## 205
## Loading objects:
## days
## 206
## Loading objects:
## days
## 207
## Loading objects:
## days
## 208
## Loading objects:
## days
## 209
## Loading objects:
## days
## 210
## Loading objects:
## days
## 211
## Loading objects:
## days
## 212
## Loading objects:
## days
## 213
## Loading objects:
## days
## 214
## Loading objects:
## days
## 215
## Loading objects:
## days
## 216
## Loading objects:
## days
## 217
## Loading objects:
## days
## 218
## Loading objects:
## days
## 219
## Loading objects:
## days
## 220
## Loading objects:
## days
## 221
## Loading objects:
## days
## 222
## Loading objects:
## days
## 223
## Loading objects:
## days
## 224
## Loading objects:
## days
## 225
## Loading objects:
## days
## 226
## Loading objects:
## days
## 227
## Loading objects:
## days
## 228
## Loading objects:
## days
## 229
## Loading objects:
## days
## 230
## Loading objects:
## days
toc()## 621.417 sec elapsed
days = days_selected_cycles
save(days, file = paste0(IO$tmp_data,"days_selected_cycles.Rdata"))Transform these data in wide format
# load( file = paste0(IO$tmp_data,"days_selected_cycles.Rdata"), verbose = TRUE)
# dim(days)
d_wide = reshape_and_impute(days)
d_wide_with_tracking = d_wide
d_wide[d_wide == -1] = 0
m = match(d_wide$cycle_id_m, cycles_m$cycle_id_m)
d_wide$stretch_id = cycles_m$stretch_id[m]
d_wide$cycle_nb_m = cycles_m$cycle_nb_m[m]
write_feather(d_wide, path = paste0(IO$tmp_data, "d_wide_without_tracking_for_cvdt.feather"))tic()
average_profiles_with_cvdt = data.frame()
for(stretch_id in unique(d_wide$stretch_id)[1:500]){
j = which(d_wide$stretch_id == stretch_id)
XX = as.matrix(d_wide[j,grep("n\\.",colnames(d_wide))])
average_profiles_this_stretch = data.frame()
for(i in 1:(nrow(XX)-n_consecutive_cycles+1)){
X = XX[i:(i+n_consecutive_cycles-1),]
average_profiles_this_stretch = rbind(average_profiles_this_stretch,
data.frame(stretch_id = stretch_id,
cycle_nb = d_wide$cycle_nb_m[j[1]+i-1],
duration = cvdt_duration(X),
timing = cvdt_timing(X),
variability = cvdt_variability(X),
cyclicity = cvdt_cyclicity(X),
cycleday_m_D = -par$D:par$Df,
average_profile = apply(X,2,mean))
)
}
average_profiles_with_cvdt = rbind(average_profiles_with_cvdt,
average_profiles_this_stretch)
}
toc()## 14.137 sec elapsed
average_profiles_with_cvdt$BC = cycles_m$BC[match(average_profiles_with_cvdt$stretch_id, cycles_m$stretch_id)]
columns_for_unique = c("stretch_id","cycle_nb","duration","timing","variability","cyclicity","BC")
cvdt_stretches = unique(average_profiles_with_cvdt[,columns_for_unique])
cvdt_stretches$user_id = cycles_m$user_id[match(cvdt_stretches$stretch_id, cycles_m$stretch_id)]
cvdt_stretches$cycle_id_m = paste0(cvdt_stretches$user_id, "_", cvdt_stretches$cycle_nb)
write_feather(average_profiles_with_cvdt, path = paste0(IO$tmp_data, "average_profiles_with_cvdt.feather"))
write_feather(cvdt_stretches, path = paste0(IO$tmp_data, "cvdt_stretches.feather"))ggplot(cvdt_stretches, aes(x = cyclicity, fill = BC))+
geom_histogram(aes(y = ..density..),binwidth = 0.05, alpha = 0.5, position = "identity")+
scale_fill_manual(values = cols$BC)ggplot(cvdt_stretches, aes(x = variability, fill = BC))+
geom_histogram(aes(y = ..density..),binwidth = 0.5, alpha = 0.5, position = "identity")+
scale_fill_manual(values = cols$BC)ggplot(cvdt_stretches, aes(x = duration, fill = BC))+
geom_histogram(aes(y = ..density..),bins = 26, alpha = 0.5, position = "identity")+
scale_fill_manual(values = cols$BC)ggplot(cvdt_stretches, aes(x = timing, fill = BC))+
geom_histogram(aes(y = ..density..),bins = 26, alpha = 0.5, position = "identity")+
scale_fill_manual(values = cols$BC)## Warning: Removed 12 rows containing non-finite values (stat_bin).
ggplot(cvdt_stretches, aes(y = cyclicity, x = duration, col = BC))+
geom_point(alpha = 0.3)+
scale_color_manual(values = cols$BC)ggplot(cvdt_stretches, aes(y = variability, x = cyclicity, col = BC))+
geom_point(alpha = 0.3)+
scale_color_manual(values = cols$BC)ggplot(cvdt_stretches, aes(y = variability, x = duration, col = BC))+
geom_point(alpha = 0.3)+
scale_color_manual(values = cols$BC)ggplot(cvdt_stretches, aes(y = timing, x = cyclicity, col = BC))+
geom_point(alpha = 0.3)+
scale_color_manual(values = cols$BC)## Warning: Removed 12 rows containing missing values (geom_point).
plot_ly(cvdt_stretches, x = ~cyclicity, y = ~duration, z = ~timing,
color = ~variability,
type = "scatter3d", mode = "markers",
marker = list(size = 2))## Warning: Ignoring 12 observations
j = which(!is.na(cvdt_stretches$timing))
cvdt_col = c("duration","timing","variability","cyclicity")
cor(cvdt_stretches[j,cvdt_col ])## duration timing variability cyclicity
## duration 1.00000000 -0.2234107 0.5761576 -0.06540099
## timing -0.22341070 1.0000000 -0.2488482 0.36553358
## variability 0.57615757 -0.2488482 1.0000000 -0.13315037
## cyclicity -0.06540099 0.3655336 -0.1331504 1.00000000
pca = prcomp(cvdt_stretches[j,cvdt_col],
scale = TRUE, center = TRUE)
ggplot(data.frame(dim = 1:4, sdev = pca$sdev), aes(x = dim, y = sdev))+
geom_bar(stat = "identity")pca_points = as.data.frame(pca$x)
pca_points$BC = cvdt_stretches$BC[j]
pca_points$duration = cvdt_stretches$duration[j]
pca_points$timing = cvdt_stretches$timing[j]
pca_vec = as.data.frame(pca$rotation)
pca_vec$attr = rownames(pca_vec)
S = 5
ggplot(pca_points, aes(x = PC1, y = PC2))+ coord_fixed()+
geom_point( alpha = 0.3, size = 0.5)+
geom_segment(data = pca_vec, aes(x = 0, y = 0, xend = S*PC1, yend = S*PC2, col = attr), size = 1)+
facet_grid(BC ~ .)plot_ly(pca_points, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~BC,
type = "scatter3d", mode = "markers",
marker = list(size = 2))o = order(cvdt_stretches$cyclicity, decreasing = TRUE)
cvdt_stretches = cvdt_stretches[o,]
cvdt_stretches = cvdt_stretches[sample(1:nrow(cvdt_stretches)),]
ggplot(cvdt_stretches[cvdt_stretches$BC %in% par$BC_dict$name,], aes(x = timing, y = duration, col = cyclicity))+ # alpha = variability,
geom_vline(xintercept = 0, col = "gray80", size = 2)+
geom_hline(yintercept = 0, col = "gray40")+
geom_segment(aes(x = timing - duration/2, xend = timing + duration/2, yend = duration, alpha = cyclicity), size = 1.2)+
geom_point(aes(size = variability), alpha = 0.7)+
scale_color_gradient(low = "gray80", high = "mediumslateblue")+
scale_x_continuous(breaks = par$x.axis)+
scale_alpha(range = c(0,0.2))+
facet_grid(BC ~ .)## Warning: Removed 12 rows containing missing values (geom_segment).
## Warning: Removed 12 rows containing missing values (geom_point).
o = order(average_profiles_with_cvdt$cyclicity, average_profiles_with_cvdt$duration, decreasing = TRUE)
average_profiles_with_cvdt = average_profiles_with_cvdt[o,]
ggplot(average_profiles_with_cvdt, aes(x = cycleday_m_D, y = stretch_id, col = BC, alpha = average_profile))+ # alpha = variability,
geom_vline(xintercept = 0, col = "gray80", size = 2)+
geom_point()+
scale_color_manual(values = cols$BC)+
scale_x_continuous(breaks = par$x.axis)+
scale_alpha(range = c(0,1))+
facet_grid( . ~ BC , scale = "free_y")duration_4 = (round(cvdt_stretches$duration) %in% 3:5)
duration_20 = (round(cvdt_stretches$duration) %in% 19:21)
cyclicity_high = (round(cvdt_stretches$cyclicity,1) >= quantile(cvdt_stretches$cyclicity,p = 0.95))
cyclicity_0 = (round(cvdt_stretches$cyclicity,2) == 0)
cyclicity_low = (round(cvdt_stretches$cyclicity,1) <= 0.2)
cyclicity_mid = (round(cvdt_stretches$cyclicity,1) == 0.5)
variability_high = (round(cvdt_stretches$variability,2) >= quantile(cvdt_stretches$variability,p = 0.95))
variability_mid = (round(cvdt_stretches$variability,2) %in%
seq(quantile(round(cvdt_stretches$variability,2),p = 0.30),quantile(round(cvdt_stretches$variability,2),p = 0.80),by = 0.01))
variability_low = (cvdt_stretches$variability <= quantile(cvdt_stretches$variability,p = 0.20))
timing_3 = (round(cvdt_stretches$timing) %in% -3:-2)
timing_7 = (round(cvdt_stretches$timing) %in% -8:-7)
timing_0 = (round(cvdt_stretches$timing) %in% -1:0)
ex = which(duration_4 & cyclicity_low & variability_mid)[1]
ex = c(ex, which( cyclicity_mid & variability_mid & timing_3)[1])
ex = c(ex, which(duration_4 & cyclicity_high & variability_high)[1])
ex = c(ex, which(duration_4 & cyclicity_mid & variability_mid & timing_0)[1])
ex = c(ex, which( cyclicity_high & variability_mid)[10])
ex = c(ex, which(duration_20 & cyclicity_mid)[1])
ex = c(ex, which(duration_20 & cyclicity_low)[1])
ex = c(ex, which(!is.na(match(cvdt_stretches$stretch_id, cycles_m$stretch_id[which(cycles_m$user_id == special_pill_user)])))[1])
if(any(is.na(ex))){ex = ex[-which(is.na(ex))]}
length(ex)## [1] 6
stretch_id_ex = cvdt_stretches$stretch_id[ex]
cycle_ids_ex = paste0(rep(cvdt_stretches$user_id[ex],each = 6),"_",rep(cvdt_stretches$cycle_nb[ex], each = 6) + rep(0:5, length(ex)) )
save(cycle_ids_ex, file = paste0(IO$tmp_data, "cycle_ids_ex_v3.Rdata"))
k = which(d_wide_with_tracking$cycle_id_m %in% cycle_ids_ex )
d = melt(d_wide_with_tracking[k,], id.vars = c("user_id","cycle_id_m"))
colnames(d)[match(c("variable","value"),colnames(d))] = c("cycleday_m_D","tender_breasts")
d$cycleday_m_D = as.numeric(gsub("\\.","-",gsub("n\\.","",d$cycleday_m_D)))
d$cycle_nb_m = cycles_m$cycle_nb_m[match(d$cycle_id_m, cycles_m$cycle_id_m)]
d$user_id = factor(d$user_id,levels = cvdt_stretches$user_id[ex])
d$stretch_id = cvdt_stretches$stretch_id[match( d$cycle_id_m, cvdt_stretches$cycle_id_m)]
#d$user_id = factor(d$user_id,levels = unique(d$user_id))
ggplot_imputed_TB(sel_d = d, facet_grid_y = "user_id")average_profiles_with_cvdt$s_id = paste0(average_profiles_with_cvdt$stretch_id,"_",average_profiles_with_cvdt$cycle_nb)
k = which(average_profiles_with_cvdt$s_id %in% paste0(cvdt_stretches$stretch_id[ex],"_", cvdt_stretches$cycle_nb[ex]))
selected_average_profiles_with_cvdt = average_profiles_with_cvdt[k,]
selected_average_profiles_with_cvdt$stretch_id = factor(selected_average_profiles_with_cvdt$stretch_id, levels = unique(cvdt_stretches$stretch_id[ex]))
ggplot(selected_average_profiles_with_cvdt,aes(x = cycleday_m_D, y = average_profile, col = stretch_id) )+
geom_point()+
geom_line()+
guides(col = FALSE)+
facet_grid(stretch_id ~ .)data.frame(cvdt_stretches$stretch_id[ex],round(cvdt_stretches[ex,cvdt_col],2))## cvdt_stretches.stretch_id.ex. duration timing
## n..185130 83120f9b11c4ee1a056a63485411a019ab04e83c_s1 4.59 -4.90
## n..181360 391aeeff3cd59b86a0c4f0f388779aafab48631e_s2 5.47 -3.11
## n..18876 511fce665cfb11f78a566b190e98e13864638ff8_s4 3.67 -0.45
## n..18874 43315b78b9ba4eeafd2c4f1a04e9d5441ac00a0f_s1 6.19 -3.00
## n..18657 9072ba048e0dce2ea3ed24b25ddfa23417aaacdd_s1 20.75 -3.77
## n..181611 d51e275e982845648f63ec06d0e16ef46466b800_s2 20.73 -5.47
## variability cyclicity
## n..185130 2.77 0.00
## n..181360 3.50 0.47
## n..18876 2.56 0.53
## n..18874 3.45 0.90
## n..18657 4.43 0.47
## n..181611 3.67 0.00